/*
Copyright 2008-2009 Elöd Egyed-Zsigmond, Cyril Laitang
Copyright 2009-2011 Samuel Gesche

This file is part of IPRI News Analyzer.

IPRI News Analyzer is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

IPRI News Analyzer is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with IPRI News Analyzer.  If not, see <http://www.gnu.org/licenses/>.
*/

package proc.rss;

import data.base.Database;
import data.base.NoBaseException;
import data.base.connectors.RSSFeedDatabase;
import data.structures.rss.RSSItem;
import data.structures.tagging.LemmaVector;
import data.structures.tagging.LemmaInfos;

import proc.tagging.TreeTagger;
import proc.text.XMLCleaner;

import java.util.HashMap;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Set;
import java.util.HashSet;
import java.util.Vector;


public class RSSLemmatizer {

    private Database theDB;

    public RSSLemmatizer(Database db) {
        theDB = db;
    }

    // add the lemmas correspondin to the itemId and for the two vectors 
    public void addLemme(int itemId, LemmaVector title, LemmaVector desc) throws NoBaseException {
        RSSFeedDatabase rS = new RSSFeedDatabase(theDB);
        HashMap<LemmaInfos, Integer> lemmasCountTitle = title.toLemmasCount();
        HashMap<LemmaInfos, Integer> lemmasCountDesc = desc.toLemmasCount();
        Hashtable<String, String> lemmasPos = title.toLemmasPos();
        lemmasPos.putAll(desc.toLemmasPos());
        Set<String> dejaFaits = new HashSet<String>();
        // for each title lemma
        Vector<LemmaInfos> liste = title.getLemmas();
        for (int i = 0; i<liste.size(); i++) {
            String lemma = liste.elementAt(i).get_lemma();
            String lex = liste.elementAt(i).get_lex();
            String bluePrint = lemma+lex+lemmasCountTitle.get(liste.elementAt(i))+lemmasCountDesc.get(liste.elementAt(i));
            if (!dejaFaits.contains(bluePrint)) {
                if (lemmasCountDesc.containsKey(liste.elementAt(i))) { // dans le titre et dans la description
                    try {
                        rS.InsertLemma(itemId, lemma, lemmasCountTitle.get(liste.elementAt(i)),
                                lemmasCountDesc.get(liste.elementAt(i)), lex);
                    } catch (Exception ex) {
                        //debug
                        System.out.println(String.format("Lemme title+desc insert problem :" + ex.getMessage()));
                    }
                } else {
                    try {
                        rS.InsertLemma(itemId, lemma, lemmasCountTitle.get(liste.elementAt(i)), 0, lex);
                    } catch (Exception ex) {
                        //debug
                        System.out.println(String.format("Lemme title insert problem :" + ex.getMessage()));
                    }
                }
                dejaFaits.add(bluePrint);
            }
        }

        // for each description lemma
        liste = desc.getLemmas();
        for (int i = 0; i<liste.size(); i++) {
            String lemma = liste.elementAt(i).get_lemma();
            String lex = liste.elementAt(i).get_lex();
            String bluePrint = lemma+lex+lemmasCountTitle.get(liste.elementAt(i))+lemmasCountDesc.get(liste.elementAt(i));
            if (!dejaFaits.contains(bluePrint)) {
                if (!lemmasCountTitle.containsKey(liste.elementAt(i))) {
                    try {
                        rS.InsertLemma(itemId, lemma, 0, lemmasCountDesc.get(liste.elementAt(i)), lex);
                    } catch (Exception ex) {
                        //debug
                        System.out.println(String.format("Lemme desc insert problem :" + ex.getMessage()));
                    }
                }
                dejaFaits.add(bluePrint);
            }
        }
    }

    /// lematized all the RSS items
    /*public void reLemmatizedAll() {
        //Get the list of all RSS feeds from the database
        RSSFeedDatabase rS = new RSSFeedDatabase(theDB);
        TreeTagger tt = new TreeTagger();
       
        try{
            rS.removeAllLemme();
            Set<RSSItem> items = rS.getAllRssItem();
            int position = 0;
            // for all the items  
            for (Iterator iter = items.iterator(); iter.hasNext();){
                position++;
                RSSItem item = (RSSItem) iter.next();
                // tag the text with treetagger
                LemmaVector lemmasTitle = tt.processText(XMLCleaner.xmlToText(item.getTitle()));
                LemmaVector lemmasDesc = tt.processText(XMLCleaner.xmlToText(item.getDescription()));
                // filter the lemmas 
                lemmasTitle.filtre();
                lemmasDesc.filtre();
                // updat lemmas fields
                item.setTitleLemmes(lemmasTitle.toLemmas());
                item.setDescLemmes(lemmasDesc.toLemmas());
                // update the two RSS_items field that contains lemmas 
                rS.UpdateLemmas(item);
                // add the lemmas to the lemma table
                addLemme(item.getId(),lemmasTitle,lemmasDesc);
            }
        }catch (Exception ex){
            //debug
            System.out.println(String.format("Lemme desc insert problem :"+ex.getMessage()));
        }
    }*/

   /* public int LemmatizedDate(java.util.Date startDate, java.util.Date endDate) {
        //Get the list of all RSS feeds from the database
        RSSFeedDatabase rS = new RSSFeedDatabase(theDB);
        return null;        
    }

    public int LemmatizedUnTreated() {
        //Get the list of all RSS feeds from the database
        RSSFeedDatabase rS = new RSSFeedDatabase(theDB);
        return null;

    }*/
}
